library(vcfR)
## 
##    *****       ***   vcfR   ***       *****
##    This is vcfR 1.12.0 
##      browseVignettes('vcfR') # Documentation
##      citation('vcfR') # Citation
##    *****       *****      *****       *****
library(SNPfiltR)
## This is SNPfiltR v.1.0.0
## 
## Detailed usage information is available at: devonderaad.github.io/SNPfiltR/ 
## 
## If you use SNPfiltR in your published work, please cite the following papers: 
## 
## DeRaad, D.A. (2022), SNPfiltR: an R package for interactive and reproducible SNP filtering. Molecular Ecology Resources, 00, 1-15. http://doi.org/10.1111/1755-0998.13618 
## 
## Knaus, Brian J., and Niklaus J. Grunwald. 2017. VCFR: a package to manipulate and visualize variant call format data in R. Molecular Ecology Resources, 17.1:44-53. http://doi.org/10.1111/1755-0998.12549
library(ggplot2)
library(RColorBrewer)

Filter input SNP files based on smaple inclusion and MAC to increase signal to noise ratio

#read in filtered vcf
vcfR <- read.vcfR("~/Desktop/todi.2022/todi.unlinked.85.vcf")
## Scanning file to determine attributes.
## File attributes:
##   meta lines: 14
##   header_line: 15
##   variant count: 1892
##   column count: 92
## 
Meta line 14 read in.
## All meta lines processed.
## gt matrix initialized.
## Character matrix gt created.
##   Character matrix gt rows: 1892
##   Character matrix gt cols: 92
##   skip: 0
##   nrows: 1892
##   row_num: 0
## 
Processed variant 1000
Processed variant: 1892
## All variants processed
vcfR
## ***** Object of Class vcfR *****
## 83 samples
## 32 CHROMs
## 1,892 variants
## Object size: 12.1 Mb
## 8.083 percent missing data
## *****        *****         *****
#bring in sample info
#read in sample info csv
sample.info<-read.csv("~/Desktop/todi.2022/todiramphus.subset.csv")
sample.info<-sample.info[sample.info$id %in% colnames(vcfR@gt),]
table(sample.info$species)
## 
##   albicilla     chloris     colonus leucopygius     sanctus saurophagus 
##           6          11           3           5          35           6 
##    sordidus   tristrami 
##           8           9
#see sample order
colnames(vcfR@gt)
##  [1] "FORMAT"                "T_albicilla_22581"     "T_albicilla_22591"    
##  [4] "T_albicilla_22592"     "T_albicilla_22611"     "T_albicilla_85104"    
##  [7] "T_albicilla_85105"     "T_chloris_12584"       "T_chloris_12606"      
## [10] "T_chloris_13960"       "T_chloris_14446"       "T_chloris_20983"      
## [13] "T_chloris_22075"       "T_chloris_23253"       "T_chloris_23630"      
## [16] "T_chloris_23632"       "T_chloris_24382"       "T_chloris_24727"      
## [19] "T_colonus_2003089"     "T_colonus_2003097"     "T_colonus_2004070"    
## [22] "T_leucopygius_32019"   "T_leucopygius_32037"   "T_leucopygius_34505"  
## [25] "T_leucopygius_34508"   "T_leucopygius_DOT6654" "T_sanctus_14877"      
## [28] "T_sanctus_14879"       "T_sanctus_24565"       "T_sanctus_25117"      
## [31] "T_sanctus_33267"       "T_sanctus_34636"       "T_sanctus_34659"      
## [34] "T_sanctus_35549"       "T_sanctus_72545"       "T_sanctus_7567"       
## [37] "T_sanctus_B29435"      "T_sanctus_B32637"      "T_sanctus_B33280"     
## [40] "T_sanctus_B34636"      "T_sanctus_B34659"      "T_sanctus_B34946"     
## [43] "T_sanctus_B43266"      "T_sanctus_B50292"      "T_sanctus_B50543"     
## [46] "T_sanctus_B51072"      "T_sanctus_B53055"      "T_sanctus_B53068"     
## [49] "T_sanctus_B54673"      "T_sanctus_B57402"      "T_sanctus_B59372"     
## [52] "T_sanctus_B60180"      "T_sanctus_B60181"      "T_sanctus_B60182"     
## [55] "T_sanctus_B60183"      "T_sanctus_B60184"      "T_saurophagus_18929"  
## [58] "T_saurophagus_27804"   "T_saurophagus_36179"   "T_saurophagus_36283"  
## [61] "T_saurophagus_36284"   "T_saurophagus_69666"   "T_sordidus_B33717"    
## [64] "T_sordidus_B33718"     "T_sordidus_B33719"     "T_sordidus_B33720"    
## [67] "T_sordidus_B44198"     "T_sordidus_B44295"     "T_sordidus_B44296"    
## [70] "T_sordidus_B51462"     "T_tristrami_18953"     "T_tristrami_27723"    
## [73] "T_tristrami_27752"     "T_tristrami_27792"     "T_tristrami_27793"    
## [76] "T_tristrami_32049"     "T_tristrami_33253"     "T_tristrami_33839"    
## [79] "T_tristrami_33858"     "T_tristrami_33864"     "T_tristrami_33867"    
## [82] "T_tristrami_33895"     "T_tristrami_36188"     "T_tristrami_6704"
#try just removing leucopygius
vcf.2<-vcfR[,c(1:21,27:84)]
colnames(vcf.2@gt)
##  [1] "FORMAT"              "T_albicilla_22581"   "T_albicilla_22591"  
##  [4] "T_albicilla_22592"   "T_albicilla_22611"   "T_albicilla_85104"  
##  [7] "T_albicilla_85105"   "T_chloris_12584"     "T_chloris_12606"    
## [10] "T_chloris_13960"     "T_chloris_14446"     "T_chloris_20983"    
## [13] "T_chloris_22075"     "T_chloris_23253"     "T_chloris_23630"    
## [16] "T_chloris_23632"     "T_chloris_24382"     "T_chloris_24727"    
## [19] "T_colonus_2003089"   "T_colonus_2003097"   "T_colonus_2004070"  
## [22] "T_sanctus_14877"     "T_sanctus_14879"     "T_sanctus_24565"    
## [25] "T_sanctus_25117"     "T_sanctus_33267"     "T_sanctus_34636"    
## [28] "T_sanctus_34659"     "T_sanctus_35549"     "T_sanctus_72545"    
## [31] "T_sanctus_7567"      "T_sanctus_B29435"    "T_sanctus_B32637"   
## [34] "T_sanctus_B33280"    "T_sanctus_B34636"    "T_sanctus_B34659"   
## [37] "T_sanctus_B34946"    "T_sanctus_B43266"    "T_sanctus_B50292"   
## [40] "T_sanctus_B50543"    "T_sanctus_B51072"    "T_sanctus_B53055"   
## [43] "T_sanctus_B53068"    "T_sanctus_B54673"    "T_sanctus_B57402"   
## [46] "T_sanctus_B59372"    "T_sanctus_B60180"    "T_sanctus_B60181"   
## [49] "T_sanctus_B60182"    "T_sanctus_B60183"    "T_sanctus_B60184"   
## [52] "T_saurophagus_18929" "T_saurophagus_27804" "T_saurophagus_36179"
## [55] "T_saurophagus_36283" "T_saurophagus_36284" "T_saurophagus_69666"
## [58] "T_sordidus_B33717"   "T_sordidus_B33718"   "T_sordidus_B33719"  
## [61] "T_sordidus_B33720"   "T_sordidus_B44198"   "T_sordidus_B44295"  
## [64] "T_sordidus_B44296"   "T_sordidus_B51462"   "T_tristrami_18953"  
## [67] "T_tristrami_27723"   "T_tristrami_27752"   "T_tristrami_27792"  
## [70] "T_tristrami_27793"   "T_tristrami_32049"   "T_tristrami_33253"  
## [73] "T_tristrami_33839"   "T_tristrami_33858"   "T_tristrami_33864"  
## [76] "T_tristrami_33867"   "T_tristrami_33895"   "T_tristrami_36188"  
## [79] "T_tristrami_6704"
#remove invariant snps 
vcf.2<-min_mac(vcf.2, min.mac = 1)
## 17.02% of SNPs fell below a minor allele count of 1 and were removed from the VCF

#vcfR::write.vcf(vcf.2, file="~/Downloads/todi.noleuco.vcf.gz")
vcf.2
## ***** Object of Class vcfR *****
## 78 samples
## 32 CHROMs
## 1,570 variants
## Object size: 9.7 Mb
## 7.197 percent missing data
## *****        *****         *****
#try removing sanctus and removing leucopygius
vcf.2a<-vcfR[,c(1:21,57:77,83,84)]
colnames(vcf.2a@gt)
##  [1] "FORMAT"              "T_albicilla_22581"   "T_albicilla_22591"  
##  [4] "T_albicilla_22592"   "T_albicilla_22611"   "T_albicilla_85104"  
##  [7] "T_albicilla_85105"   "T_chloris_12584"     "T_chloris_12606"    
## [10] "T_chloris_13960"     "T_chloris_14446"     "T_chloris_20983"    
## [13] "T_chloris_22075"     "T_chloris_23253"     "T_chloris_23630"    
## [16] "T_chloris_23632"     "T_chloris_24382"     "T_chloris_24727"    
## [19] "T_colonus_2003089"   "T_colonus_2003097"   "T_colonus_2004070"  
## [22] "T_saurophagus_18929" "T_saurophagus_27804" "T_saurophagus_36179"
## [25] "T_saurophagus_36283" "T_saurophagus_36284" "T_saurophagus_69666"
## [28] "T_sordidus_B33717"   "T_sordidus_B33718"   "T_sordidus_B33719"  
## [31] "T_sordidus_B33720"   "T_sordidus_B44198"   "T_sordidus_B44295"  
## [34] "T_sordidus_B44296"   "T_sordidus_B51462"   "T_tristrami_18953"  
## [37] "T_tristrami_27723"   "T_tristrami_27752"   "T_tristrami_27792"  
## [40] "T_tristrami_27793"   "T_tristrami_32049"   "T_tristrami_33253"  
## [43] "T_tristrami_36188"   "T_tristrami_6704"
#remove invariant snps 
vcf.2a<-min_mac(vcf.2a, min.mac = 1)
## 60.04% of SNPs fell below a minor allele count of 1 and were removed from the VCF

#vcfR::write.vcf(vcf.2a, file="~/Downloads/todi.nosanctus.vcf.gz")
vcf.2a
## ***** Object of Class vcfR *****
## 43 samples
## 26 CHROMs
## 756 variants
## Object size: 2.8 Mb
## 7.709 percent missing data
## *****        *****         *****
#set up pairwise comparisons between sanctus and other taxa
vcf.2a<-vcfR[,c(1,27:56,78:82,63:70)]
colnames(vcf.2a@gt)
##  [1] "FORMAT"            "T_sanctus_14877"   "T_sanctus_14879"  
##  [4] "T_sanctus_24565"   "T_sanctus_25117"   "T_sanctus_33267"  
##  [7] "T_sanctus_34636"   "T_sanctus_34659"   "T_sanctus_35549"  
## [10] "T_sanctus_72545"   "T_sanctus_7567"    "T_sanctus_B29435" 
## [13] "T_sanctus_B32637"  "T_sanctus_B33280"  "T_sanctus_B34636" 
## [16] "T_sanctus_B34659"  "T_sanctus_B34946"  "T_sanctus_B43266" 
## [19] "T_sanctus_B50292"  "T_sanctus_B50543"  "T_sanctus_B51072" 
## [22] "T_sanctus_B53055"  "T_sanctus_B53068"  "T_sanctus_B54673" 
## [25] "T_sanctus_B57402"  "T_sanctus_B59372"  "T_sanctus_B60180" 
## [28] "T_sanctus_B60181"  "T_sanctus_B60182"  "T_sanctus_B60183" 
## [31] "T_sanctus_B60184"  "T_tristrami_33839" "T_tristrami_33858"
## [34] "T_tristrami_33864" "T_tristrami_33867" "T_tristrami_33895"
## [37] "T_sordidus_B33717" "T_sordidus_B33718" "T_sordidus_B33719"
## [40] "T_sordidus_B33720" "T_sordidus_B44198" "T_sordidus_B44295"
## [43] "T_sordidus_B44296" "T_sordidus_B51462"
#remove invariant snps 
vcf.2a<-min_mac(vcf.2a, min.mac = 1)
## 40.96% of SNPs fell below a minor allele count of 1 and were removed from the VCF

#vcfR::write.vcf(vcf.2a, file="~/Downloads/todi.san.sord.vcf.gz")
vcf.2a
## ***** Object of Class vcfR *****
## 43 samples
## 30 CHROMs
## 1,117 variants
## Object size: 4.2 Mb
## 6.766 percent missing data
## *****        *****         *****
#set up pairwise comparisons between sanctus and other taxa
vcf.2a<-vcfR[,c(1,27:56,78:82,71:77,83,84)]
colnames(vcf.2a@gt)
##  [1] "FORMAT"            "T_sanctus_14877"   "T_sanctus_14879"  
##  [4] "T_sanctus_24565"   "T_sanctus_25117"   "T_sanctus_33267"  
##  [7] "T_sanctus_34636"   "T_sanctus_34659"   "T_sanctus_35549"  
## [10] "T_sanctus_72545"   "T_sanctus_7567"    "T_sanctus_B29435" 
## [13] "T_sanctus_B32637"  "T_sanctus_B33280"  "T_sanctus_B34636" 
## [16] "T_sanctus_B34659"  "T_sanctus_B34946"  "T_sanctus_B43266" 
## [19] "T_sanctus_B50292"  "T_sanctus_B50543"  "T_sanctus_B51072" 
## [22] "T_sanctus_B53055"  "T_sanctus_B53068"  "T_sanctus_B54673" 
## [25] "T_sanctus_B57402"  "T_sanctus_B59372"  "T_sanctus_B60180" 
## [28] "T_sanctus_B60181"  "T_sanctus_B60182"  "T_sanctus_B60183" 
## [31] "T_sanctus_B60184"  "T_tristrami_33839" "T_tristrami_33858"
## [34] "T_tristrami_33864" "T_tristrami_33867" "T_tristrami_33895"
## [37] "T_tristrami_18953" "T_tristrami_27723" "T_tristrami_27752"
## [40] "T_tristrami_27792" "T_tristrami_27793" "T_tristrami_32049"
## [43] "T_tristrami_33253" "T_tristrami_36188" "T_tristrami_6704"
#remove invariant snps 
vcf.2a<-min_mac(vcf.2a, min.mac = 1)
## 38.32% of SNPs fell below a minor allele count of 1 and were removed from the VCF

#vcfR::write.vcf(vcf.2a, file="~/Downloads/todi.san.tris.vcf.gz")
vcf.2a
## ***** Object of Class vcfR *****
## 44 samples
## 31 CHROMs
## 1,167 variants
## Object size: 4.5 Mb
## 7.523 percent missing data
## *****        *****         *****
#set up pairwise comparisons between sanctus and saur
vcf.2a<-vcfR[,c(1,27:56,78:82,57:62)]
colnames(vcf.2a@gt)
##  [1] "FORMAT"              "T_sanctus_14877"     "T_sanctus_14879"    
##  [4] "T_sanctus_24565"     "T_sanctus_25117"     "T_sanctus_33267"    
##  [7] "T_sanctus_34636"     "T_sanctus_34659"     "T_sanctus_35549"    
## [10] "T_sanctus_72545"     "T_sanctus_7567"      "T_sanctus_B29435"   
## [13] "T_sanctus_B32637"    "T_sanctus_B33280"    "T_sanctus_B34636"   
## [16] "T_sanctus_B34659"    "T_sanctus_B34946"    "T_sanctus_B43266"   
## [19] "T_sanctus_B50292"    "T_sanctus_B50543"    "T_sanctus_B51072"   
## [22] "T_sanctus_B53055"    "T_sanctus_B53068"    "T_sanctus_B54673"   
## [25] "T_sanctus_B57402"    "T_sanctus_B59372"    "T_sanctus_B60180"   
## [28] "T_sanctus_B60181"    "T_sanctus_B60182"    "T_sanctus_B60183"   
## [31] "T_sanctus_B60184"    "T_tristrami_33839"   "T_tristrami_33858"  
## [34] "T_tristrami_33864"   "T_tristrami_33867"   "T_tristrami_33895"  
## [37] "T_saurophagus_18929" "T_saurophagus_27804" "T_saurophagus_36179"
## [40] "T_saurophagus_36283" "T_saurophagus_36284" "T_saurophagus_69666"
#remove invariant snps 
vcf.2a<-min_mac(vcf.2a, min.mac = 1)
## 43.97% of SNPs fell below a minor allele count of 1 and were removed from the VCF

#vcfR::write.vcf(vcf.2a, file="~/Downloads/todi.san.saur.vcf.gz")
vcf.2a
## ***** Object of Class vcfR *****
## 41 samples
## 30 CHROMs
## 1,060 variants
## Object size: 3.9 Mb
## 7.563 percent missing data
## *****        *****         *****

Code to convert the vcf into appropriate file structure and run ADMIXTURE on the cluster

#!/bin/sh
#
#SBATCH --job-name=admixture.prim              # Job Name
#SBATCH --nodes=1             # 40 nodes
#SBATCH --ntasks-per-node=5               # 40 CPU allocation per Task
#SBATCH --partition=sixhour            # Name of the Slurm partition used
#SBATCH --chdir=/home/d669d153/work/todi.subset.2022/revision.admixture/primary     # Set working d$
#SBATCH --mem-per-cpu=1gb            # memory requested
#SBATCH --time=200

#use plink to convert vcf directly to bed format:
/home/d669d153/work/plink --vcf /home/d669d153/work/todi.subset.2022/revision.admixture/primary/todi.downsamplesanctus.5.vcf  --double-id --allow-extra-chr --make-bed --out binary_fileset
#fix chromosome names
cut -f2- binary_fileset.bim  > temp
awk 'BEGIN{FS=OFS="\t"}{print value 1 OFS $0}' temp > binary_fileset.bim
rm temp

#run admixture for a K of 1-10, using cross-validation, with 10 threads
for K in 1 2 3 4 5 6 7 8 9 10; 
do /home/d669d153/work/admixture_linux-1.3.0/admixture --cv -j5 -m EM binary_fileset.bed $K | tee log${K}.out;
done

#Which K iteration is optimal according to ADMIXTURE ?
grep -h CV log*.out > log.errors.txt

check out the run with all samples included

#setwd to admixture directory run on the cluster
setwd("~/Downloads/no.leuco/")
#read in log error values to determine optimal K
log<-read.table("log.errors.txt")[,c(3:4)]
#use double backslash to interpret the opening parentheses literally in the regular expression
log$V3<-gsub("\\(K=", "", log$V3)
log$V3<-gsub("):", "", log$V3)
#interpret K values as numerical
log$V3<-as.numeric(log$V3)
#rename columns
colnames(log)<-c("Kvalue","cross.validation.error")
#make plot showing the cross validation error across K values 1:10
ggplot(data=log, aes(x=Kvalue, y=cross.validation.error, group=1)) +
  geom_line(linetype = "dashed")+
  geom_point()+
  ylab("cross-validation error")+
  xlab("K")+
  scale_x_continuous(breaks = c(1:10))+
  theme_classic()

#read in input file in order to get list of input samples in order
samps<-read.table("binary_fileset.fam")[,1]
#reorder sampling df to match order of the plot
sample.info<-sample.info[match(samps, sample.info$id),]
sample.info$id == samps
##  [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [16] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [31] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [46] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [61] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [76] TRUE TRUE TRUE
#read in all ten runs and save each dataframe in a list
runs<-list()
#read in log files
for (i in 1:10){
  runs[[i]]<-read.table(paste0("binary_fileset.", i, ".Q"))
}
par(mfrow=c(1,1))
#plot each run
for (i in 1:5){
barplot(t(as.matrix(runs[[i]])), col=rainbow(i), ylab="Ancestry", border="black")
}

for (i in 6:10){
barplot(t(as.matrix(runs[[i]])), col=rainbow(i), ylab="Ancestry", border="black")
}

#show sample order
samps
##  [1] "T_albicilla_22581"   "T_albicilla_22591"   "T_albicilla_22592"  
##  [4] "T_albicilla_22611"   "T_albicilla_85104"   "T_albicilla_85105"  
##  [7] "T_chloris_12584"     "T_chloris_12606"     "T_chloris_13960"    
## [10] "T_chloris_14446"     "T_chloris_20983"     "T_chloris_22075"    
## [13] "T_chloris_23253"     "T_chloris_23630"     "T_chloris_23632"    
## [16] "T_chloris_24382"     "T_chloris_24727"     "T_colonus_2003089"  
## [19] "T_colonus_2003097"   "T_colonus_2004070"   "T_sanctus_14877"    
## [22] "T_sanctus_14879"     "T_sanctus_24565"     "T_sanctus_25117"    
## [25] "T_sanctus_33267"     "T_sanctus_34636"     "T_sanctus_34659"    
## [28] "T_sanctus_35549"     "T_sanctus_72545"     "T_sanctus_7567"     
## [31] "T_sanctus_B29435"    "T_sanctus_B32637"    "T_sanctus_B33280"   
## [34] "T_sanctus_B34636"    "T_sanctus_B34659"    "T_sanctus_B34946"   
## [37] "T_sanctus_B43266"    "T_sanctus_B50292"    "T_sanctus_B50543"   
## [40] "T_sanctus_B51072"    "T_sanctus_B53055"    "T_sanctus_B53068"   
## [43] "T_sanctus_B54673"    "T_sanctus_B57402"    "T_sanctus_B59372"   
## [46] "T_sanctus_B60180"    "T_sanctus_B60181"    "T_sanctus_B60182"   
## [49] "T_sanctus_B60183"    "T_sanctus_B60184"    "T_saurophagus_18929"
## [52] "T_saurophagus_27804" "T_saurophagus_36179" "T_saurophagus_36283"
## [55] "T_saurophagus_36284" "T_saurophagus_69666" "T_sordidus_B33717"  
## [58] "T_sordidus_B33718"   "T_sordidus_B33719"   "T_sordidus_B33720"  
## [61] "T_sordidus_B44198"   "T_sordidus_B44295"   "T_sordidus_B44296"  
## [64] "T_sordidus_B51462"   "T_tristrami_18953"   "T_tristrami_27723"  
## [67] "T_tristrami_27752"   "T_tristrami_27792"   "T_tristrami_27793"  
## [70] "T_tristrami_32049"   "T_tristrami_33253"   "T_tristrami_33839"  
## [73] "T_tristrami_33858"   "T_tristrami_33864"   "T_tristrami_33867"  
## [76] "T_tristrami_33895"   "T_tristrami_36188"   "T_tristrami_6704"
samps[c(1:6,51:56,65:71,77,78,18:20,57:64,7:17,72:76,21:50)]
##  [1] "T_albicilla_22581"   "T_albicilla_22591"   "T_albicilla_22592"  
##  [4] "T_albicilla_22611"   "T_albicilla_85104"   "T_albicilla_85105"  
##  [7] "T_saurophagus_18929" "T_saurophagus_27804" "T_saurophagus_36179"
## [10] "T_saurophagus_36283" "T_saurophagus_36284" "T_saurophagus_69666"
## [13] "T_tristrami_18953"   "T_tristrami_27723"   "T_tristrami_27752"  
## [16] "T_tristrami_27792"   "T_tristrami_27793"   "T_tristrami_32049"  
## [19] "T_tristrami_33253"   "T_tristrami_36188"   "T_tristrami_6704"   
## [22] "T_colonus_2003089"   "T_colonus_2003097"   "T_colonus_2004070"  
## [25] "T_sordidus_B33717"   "T_sordidus_B33718"   "T_sordidus_B33719"  
## [28] "T_sordidus_B33720"   "T_sordidus_B44198"   "T_sordidus_B44295"  
## [31] "T_sordidus_B44296"   "T_sordidus_B51462"   "T_chloris_12584"    
## [34] "T_chloris_12606"     "T_chloris_13960"     "T_chloris_14446"    
## [37] "T_chloris_20983"     "T_chloris_22075"     "T_chloris_23253"    
## [40] "T_chloris_23630"     "T_chloris_23632"     "T_chloris_24382"    
## [43] "T_chloris_24727"     "T_tristrami_33839"   "T_tristrami_33858"  
## [46] "T_tristrami_33864"   "T_tristrami_33867"   "T_tristrami_33895"  
## [49] "T_sanctus_14877"     "T_sanctus_14879"     "T_sanctus_24565"    
## [52] "T_sanctus_25117"     "T_sanctus_33267"     "T_sanctus_34636"    
## [55] "T_sanctus_34659"     "T_sanctus_35549"     "T_sanctus_72545"    
## [58] "T_sanctus_7567"      "T_sanctus_B29435"    "T_sanctus_B32637"   
## [61] "T_sanctus_B33280"    "T_sanctus_B34636"    "T_sanctus_B34659"   
## [64] "T_sanctus_B34946"    "T_sanctus_B43266"    "T_sanctus_B50292"   
## [67] "T_sanctus_B50543"    "T_sanctus_B51072"    "T_sanctus_B53055"   
## [70] "T_sanctus_B53068"    "T_sanctus_B54673"    "T_sanctus_B57402"   
## [73] "T_sanctus_B59372"    "T_sanctus_B60180"    "T_sanctus_B60181"   
## [76] "T_sanctus_B60182"    "T_sanctus_B60183"    "T_sanctus_B60184"
#color code
brewer.pal(8, "Set2")
## [1] "#66C2A5" "#FC8D62" "#8DA0CB" "#E78AC3" "#A6D854" "#FFD92F" "#E5C494"
## [8] "#B3B3B3"
barplot(t(as.matrix(runs[[5]]))[,c(1:6,51:56,65:71,77,78,18:20,57:64,7:17,72:76,21:50)], col=c("#E5C494","#A6D854","#FFD92F","#FC8D62","#B3B3B3","#66C2A5","#8DA0CB"), ylab="Ancestry", border="black")

#save barplots
#pdf("~/Desktop/admix.all.pdf", width = 8, height=3)
#barplot(t(as.matrix(runs[[5]]))[,c(1:6,51:56,65:71,77,78,18:20,57:64,7:17,72:76,21:50)], col=c("#E5C494","#A6D854","#FFD92F","#FC8D62","#B3B3B3","#66C2A5","#8DA0CB"), ylab="Ancestry", border="black")
#dev.off()

now check out the results with no sanctus

#setwd to admixture directory run on the cluster
setwd("~/Downloads/nosanctus/")
#read in log error values to determine optimal K
log<-read.table("log.errors.txt")[,c(3:4)]
#use double backslash to interpret the opening parentheses literally in the regular expression
log$V3<-gsub("\\(K=", "", log$V3)
log$V3<-gsub("):", "", log$V3)
#interpret K values as numerical
log$V3<-as.numeric(log$V3)
#rename columns
colnames(log)<-c("Kvalue","cross.validation.error")
#make plot showing the cross validation error across K values 1:10
ggplot(data=log, aes(x=Kvalue, y=cross.validation.error, group=1)) +
  geom_line(linetype = "dashed")+
  geom_point()+
  ylab("cross-validation error")+
  xlab("K")+
  scale_x_continuous(breaks = c(1:10))+
  theme_classic()

#read in input file in order to get list of input samples in order
samps<-read.table("binary_fileset.fam")[,1]
#reorder sampling df to match order of the plot
sample.info<-sample.info[match(samps, sample.info$id),]
sample.info$id == samps
##  [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [16] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [31] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
#read in all ten runs and save each dataframe in a list
runs<-list()
#read in log files
for (i in 1:10){
  runs[[i]]<-read.table(paste0("binary_fileset.", i, ".Q"))
}
par(mfrow=c(1,1))
#plot each run
for (i in 1:5){
barplot(t(as.matrix(runs[[i]])), col=rainbow(i), ylab="Ancestry", border="black")
}

for (i in 6:10){
barplot(t(as.matrix(runs[[i]])), col=rainbow(i), ylab="Ancestry", border="black")
}

#show sample order
samps
##  [1] "T_albicilla_22581"   "T_albicilla_22591"   "T_albicilla_22592"  
##  [4] "T_albicilla_22611"   "T_albicilla_85104"   "T_albicilla_85105"  
##  [7] "T_chloris_12584"     "T_chloris_12606"     "T_chloris_13960"    
## [10] "T_chloris_14446"     "T_chloris_20983"     "T_chloris_22075"    
## [13] "T_chloris_23253"     "T_chloris_23630"     "T_chloris_23632"    
## [16] "T_chloris_24382"     "T_chloris_24727"     "T_colonus_2003089"  
## [19] "T_colonus_2003097"   "T_colonus_2004070"   "T_saurophagus_18929"
## [22] "T_saurophagus_27804" "T_saurophagus_36179" "T_saurophagus_36283"
## [25] "T_saurophagus_36284" "T_saurophagus_69666" "T_sordidus_B33717"  
## [28] "T_sordidus_B33718"   "T_sordidus_B33719"   "T_sordidus_B33720"  
## [31] "T_sordidus_B44198"   "T_sordidus_B44295"   "T_sordidus_B44296"  
## [34] "T_sordidus_B51462"   "T_tristrami_18953"   "T_tristrami_27723"  
## [37] "T_tristrami_27752"   "T_tristrami_27792"   "T_tristrami_27793"  
## [40] "T_tristrami_32049"   "T_tristrami_33253"   "T_tristrami_36188"  
## [43] "T_tristrami_6704"
samps[c(1:6,21:26,35:43,18:20,27:34,7,8,12,13,16,17,9:11,14,15)]
##  [1] "T_albicilla_22581"   "T_albicilla_22591"   "T_albicilla_22592"  
##  [4] "T_albicilla_22611"   "T_albicilla_85104"   "T_albicilla_85105"  
##  [7] "T_saurophagus_18929" "T_saurophagus_27804" "T_saurophagus_36179"
## [10] "T_saurophagus_36283" "T_saurophagus_36284" "T_saurophagus_69666"
## [13] "T_tristrami_18953"   "T_tristrami_27723"   "T_tristrami_27752"  
## [16] "T_tristrami_27792"   "T_tristrami_27793"   "T_tristrami_32049"  
## [19] "T_tristrami_33253"   "T_tristrami_36188"   "T_tristrami_6704"   
## [22] "T_colonus_2003089"   "T_colonus_2003097"   "T_colonus_2004070"  
## [25] "T_sordidus_B33717"   "T_sordidus_B33718"   "T_sordidus_B33719"  
## [28] "T_sordidus_B33720"   "T_sordidus_B44198"   "T_sordidus_B44295"  
## [31] "T_sordidus_B44296"   "T_sordidus_B51462"   "T_chloris_12584"    
## [34] "T_chloris_12606"     "T_chloris_22075"     "T_chloris_23253"    
## [37] "T_chloris_24382"     "T_chloris_24727"     "T_chloris_13960"    
## [40] "T_chloris_14446"     "T_chloris_20983"     "T_chloris_23630"    
## [43] "T_chloris_23632"
#color code
brewer.pal(8, "Set2")
## [1] "#66C2A5" "#FC8D62" "#8DA0CB" "#E78AC3" "#A6D854" "#FFD92F" "#E5C494"
## [8] "#B3B3B3"
barplot(t(as.matrix(runs[[8]]))[,c(1:6,21:26,35:43,18:20,27:34,7,8,12,13,16,17,9:11,14,15)], col=c("#FFD92F","#66C2A5","black","#8DA0CB","#B3B3B3","#E5C494","#FC8D62","white"), ylab="Ancestry", border="black")

#save barplots
#pdf("~/Desktop/admix.sub.pdf", width = 8, height=3)
#barplot(t(as.matrix(runs[[8]]))[,c(1:6,21:26,35:43,18:20,27:34,7,8,12,13,16,17,9:11,14,15)], #col=c("#FFD92F","#66C2A5","black","#8DA0CB","#B3B3B3","#E5C494","#FC8D62","white"), ylab="Ancestry", border="black")
#dev.off()

now check out pairwise comparisons between sympatric taxa and sanctus

#setwd to admixture directory run on the cluster
setwd("~/Downloads/san.sord")
#read in log error values to determine optimal K
log<-read.table("log.errors.txt")[,c(3:4)]
#use double backslash to interpret the opening parentheses literally in the regular expression
log$V3<-gsub("\\(K=", "", log$V3)
log$V3<-gsub("):", "", log$V3)
#interpret K values as numerical
log$V3<-as.numeric(log$V3)
#rename columns
colnames(log)<-c("Kvalue","cross.validation.error")
#make plot showing the cross validation error across K values 1:10
ggplot(data=log, aes(x=Kvalue, y=cross.validation.error, group=1)) +
  geom_line(linetype = "dashed")+
  geom_point()+
  ylab("cross-validation error")+
  xlab("K")+
  scale_x_continuous(breaks = c(1:10))+
  theme_classic()

#read in input file in order to get list of input samples in order
samps<-read.table("binary_fileset.fam")[,1]
#reorder sampling df to match order of the plot
sample.info<-sample.info[match(samps, sample.info$id),]
sample.info$id == samps
##  [1]   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA
## [16]   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA
## [31]   NA   NA   NA   NA   NA TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
#read in all ten runs and save each dataframe in a list
runs<-list()
#read in log files
for (i in 1:10){
  runs[[i]]<-read.table(paste0("binary_fileset.", i, ".Q"))
}
par(mfrow=c(1,1))
#plot each run
for (i in 1:5){
barplot(t(as.matrix(runs[[i]])), col=rainbow(i), ylab="Ancestry", border="black")
}

for (i in 6:10){
barplot(t(as.matrix(runs[[i]])), col=rainbow(i), ylab="Ancestry", border="black")
}

#show sample order
samps
##  [1] "T_sanctus_14877"   "T_sanctus_14879"   "T_sanctus_24565"  
##  [4] "T_sanctus_25117"   "T_sanctus_33267"   "T_sanctus_34636"  
##  [7] "T_sanctus_34659"   "T_sanctus_35549"   "T_sanctus_72545"  
## [10] "T_sanctus_7567"    "T_sanctus_B29435"  "T_sanctus_B32637" 
## [13] "T_sanctus_B33280"  "T_sanctus_B34636"  "T_sanctus_B34659" 
## [16] "T_sanctus_B34946"  "T_sanctus_B43266"  "T_sanctus_B50292" 
## [19] "T_sanctus_B50543"  "T_sanctus_B51072"  "T_sanctus_B53055" 
## [22] "T_sanctus_B53068"  "T_sanctus_B54673"  "T_sanctus_B57402" 
## [25] "T_sanctus_B59372"  "T_sanctus_B60180"  "T_sanctus_B60181" 
## [28] "T_sanctus_B60182"  "T_sanctus_B60183"  "T_sanctus_B60184" 
## [31] "T_tristrami_33839" "T_tristrami_33858" "T_tristrami_33864"
## [34] "T_tristrami_33867" "T_tristrami_33895" "T_sordidus_B33717"
## [37] "T_sordidus_B33718" "T_sordidus_B33719" "T_sordidus_B33720"
## [40] "T_sordidus_B44198" "T_sordidus_B44295" "T_sordidus_B44296"
## [43] "T_sordidus_B51462"
#color code
brewer.pal(8, "Set2")
## [1] "#66C2A5" "#FC8D62" "#8DA0CB" "#E78AC3" "#A6D854" "#FFD92F" "#E5C494"
## [8] "#B3B3B3"
barplot(t(as.matrix(runs[[2]])), col=c("#A6D854","#E5C494"), ylab="Ancestry", border="black")

#save barplots
#pdf("~/Desktop/admix.san.sord.pdf", width = 8, height=2.8)
#barplot(t(as.matrix(runs[[2]])), col=c("#A6D854","#E5C494"), ylab="Ancestry", border="black")
#dev.off()

now check out pairwise comparisons between sympatric taxa and sanctus

#setwd to admixture directory run on the cluster
setwd("~/Downloads/san.tris")
#read in log error values to determine optimal K
log<-read.table("log.errors.txt")[,c(3:4)]
#use double backslash to interpret the opening parentheses literally in the regular expression
log$V3<-gsub("\\(K=", "", log$V3)
log$V3<-gsub("):", "", log$V3)
#interpret K values as numerical
log$V3<-as.numeric(log$V3)
#rename columns
colnames(log)<-c("Kvalue","cross.validation.error")
#make plot showing the cross validation error across K values 1:10
ggplot(data=log, aes(x=Kvalue, y=cross.validation.error, group=1)) +
  geom_line(linetype = "dashed")+
  geom_point()+
  ylab("cross-validation error")+
  xlab("K")+
  scale_x_continuous(breaks = c(1:10))+
  theme_classic()

#read in input file in order to get list of input samples in order
samps<-read.table("binary_fileset.fam")[,1]
#reorder sampling df to match order of the plot
sample.info<-sample.info[match(samps, sample.info$id),]
sample.info$id == samps
##  [1] NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
## [26] NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
#read in all ten runs and save each dataframe in a list
runs<-list()
#read in log files
for (i in 1:10){
  runs[[i]]<-read.table(paste0("binary_fileset.", i, ".Q"))
}
par(mfrow=c(1,1))
#plot each run
for (i in 1:5){
barplot(t(as.matrix(runs[[i]])), col=rainbow(i), ylab="Ancestry", border="black")
}

for (i in 6:10){
barplot(t(as.matrix(runs[[i]])), col=rainbow(i), ylab="Ancestry", border="black")
}

#show sample order
samps
##  [1] "T_sanctus_14877"   "T_sanctus_14879"   "T_sanctus_24565"  
##  [4] "T_sanctus_25117"   "T_sanctus_33267"   "T_sanctus_34636"  
##  [7] "T_sanctus_34659"   "T_sanctus_35549"   "T_sanctus_72545"  
## [10] "T_sanctus_7567"    "T_sanctus_B29435"  "T_sanctus_B32637" 
## [13] "T_sanctus_B33280"  "T_sanctus_B34636"  "T_sanctus_B34659" 
## [16] "T_sanctus_B34946"  "T_sanctus_B43266"  "T_sanctus_B50292" 
## [19] "T_sanctus_B50543"  "T_sanctus_B51072"  "T_sanctus_B53055" 
## [22] "T_sanctus_B53068"  "T_sanctus_B54673"  "T_sanctus_B57402" 
## [25] "T_sanctus_B59372"  "T_sanctus_B60180"  "T_sanctus_B60181" 
## [28] "T_sanctus_B60182"  "T_sanctus_B60183"  "T_sanctus_B60184" 
## [31] "T_tristrami_33839" "T_tristrami_33858" "T_tristrami_33864"
## [34] "T_tristrami_33867" "T_tristrami_33895" "T_tristrami_18953"
## [37] "T_tristrami_27723" "T_tristrami_27752" "T_tristrami_27792"
## [40] "T_tristrami_27793" "T_tristrami_32049" "T_tristrami_33253"
## [43] "T_tristrami_36188" "T_tristrami_6704"
brewer.pal(8, "Set2")
## [1] "#66C2A5" "#FC8D62" "#8DA0CB" "#E78AC3" "#A6D854" "#FFD92F" "#E5C494"
## [8] "#B3B3B3"
barplot(t(as.matrix(runs[[2]])), col=c("#B3B3B3","#A6D854"), ylab="Ancestry", border="black")

#save barplots
#pdf("~/Desktop/admix.san.tris.pdf", width = 8, height=2.8)
#barplot(t(as.matrix(runs[[2]])), col=c("#B3B3B3","#A6D854"), ylab="Ancestry", border="black")
#dev.off()

now check out pairwise comparisons between sympatric taxa and sanctus

#setwd to admixture directory run on the cluster
setwd("~/Downloads/san.saur")
#read in log error values to determine optimal K
log<-read.table("log.errors.txt")[,c(3:4)]
#use double backslash to interpret the opening parentheses literally in the regular expression
log$V3<-gsub("\\(K=", "", log$V3)
log$V3<-gsub("):", "", log$V3)
#interpret K values as numerical
log$V3<-as.numeric(log$V3)
#rename columns
colnames(log)<-c("Kvalue","cross.validation.error")
#make plot showing the cross validation error across K values 1:10
ggplot(data=log, aes(x=Kvalue, y=cross.validation.error, group=1)) +
  geom_line(linetype = "dashed")+
  geom_point()+
  ylab("cross-validation error")+
  xlab("K")+
  scale_x_continuous(breaks = c(1:10))+
  theme_classic()

#read in input file in order to get list of input samples in order
samps<-read.table("binary_fileset.fam")[,1]
#reorder sampling df to match order of the plot
sample.info<-sample.info[match(samps, sample.info$id),]
sample.info$id == samps
##  [1] NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
## [26] NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
#read in all ten runs and save each dataframe in a list
runs<-list()
#read in log files
for (i in 1:10){
  runs[[i]]<-read.table(paste0("binary_fileset.", i, ".Q"))
}
par(mfrow=c(1,1))
#plot each run
for (i in 1:5){
barplot(t(as.matrix(runs[[i]])), col=rainbow(i), ylab="Ancestry", border="black")
}

for (i in 6:10){
barplot(t(as.matrix(runs[[i]])), col=rainbow(i), ylab="Ancestry", border="black")
}

#show sample order
samps
##  [1] "T_sanctus_14877"     "T_sanctus_14879"     "T_sanctus_24565"    
##  [4] "T_sanctus_25117"     "T_sanctus_33267"     "T_sanctus_34636"    
##  [7] "T_sanctus_34659"     "T_sanctus_35549"     "T_sanctus_72545"    
## [10] "T_sanctus_7567"      "T_sanctus_B29435"    "T_sanctus_B32637"   
## [13] "T_sanctus_B33280"    "T_sanctus_B34636"    "T_sanctus_B34659"   
## [16] "T_sanctus_B34946"    "T_sanctus_B43266"    "T_sanctus_B50292"   
## [19] "T_sanctus_B50543"    "T_sanctus_B51072"    "T_sanctus_B53055"   
## [22] "T_sanctus_B53068"    "T_sanctus_B54673"    "T_sanctus_B57402"   
## [25] "T_sanctus_B59372"    "T_sanctus_B60180"    "T_sanctus_B60181"   
## [28] "T_sanctus_B60182"    "T_sanctus_B60183"    "T_sanctus_B60184"   
## [31] "T_tristrami_33839"   "T_tristrami_33858"   "T_tristrami_33864"  
## [34] "T_tristrami_33867"   "T_tristrami_33895"   "T_saurophagus_18929"
## [37] "T_saurophagus_27804" "T_saurophagus_36179" "T_saurophagus_36283"
## [40] "T_saurophagus_36284" "T_saurophagus_69666"
brewer.pal(8, "Set2")
## [1] "#66C2A5" "#FC8D62" "#8DA0CB" "#E78AC3" "#A6D854" "#FFD92F" "#E5C494"
## [8] "#B3B3B3"
barplot(t(as.matrix(runs[[2]])), col=c("#FFD92F","#A6D854"), ylab="Ancestry", border="black")

#save barplots
#pdf("~/Desktop/admix.san.saur.pdf", width = 8, height=2.8)
#barplot(t(as.matrix(runs[[2]])), col=c("#FFD92F","#A6D854"), ylab="Ancestry", border="black")
#dev.off()